{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# admissionDx\n",
    "\n",
    "The following columns are available:\n",
    "\n",
    "* admitDxName\n",
    "* admitDxText\n",
    "\n",
    "We recommend configuring the `config.ini` file to allow for connection to the database without specifying your password each time."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/alistairewj/.local/lib/python3.5/site-packages/psycopg2/__init__.py:144: UserWarning: The psycopg2 wheel package will be renamed from release 2.8; in order to keep installing from binary please use \"pip install psycopg2-binary\" instead. For details see: <http://initd.org/psycopg/docs/install.html#binary-install-from-pypi>.\n",
      "  \"\"\")\n"
     ]
    }
   ],
   "source": [
    "# Import libraries\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import psycopg2\n",
    "import getpass\n",
    "import pdvega\n",
    "\n",
    "# for configuring connection \n",
    "from configobj import ConfigObj\n",
    "import os\n",
    "\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Database: eicu\n",
      "Username: alistairewj\n"
     ]
    }
   ],
   "source": [
    "# Create a database connection using settings from config file\n",
    "config='../db/config.ini'\n",
    "\n",
    "# connection info\n",
    "conn_info = dict()\n",
    "if os.path.isfile(config):\n",
    "    config = ConfigObj(config)\n",
    "    conn_info[\"sqluser\"] = config['username']\n",
    "    conn_info[\"sqlpass\"] = config['password']\n",
    "    conn_info[\"sqlhost\"] = config['host']\n",
    "    conn_info[\"sqlport\"] = config['port']\n",
    "    conn_info[\"dbname\"] = config['dbname']\n",
    "    conn_info[\"schema_name\"] = config['schema_name']\n",
    "else:\n",
    "    conn_info[\"sqluser\"] = 'postgres'\n",
    "    conn_info[\"sqlpass\"] = ''\n",
    "    conn_info[\"sqlhost\"] = 'localhost'\n",
    "    conn_info[\"sqlport\"] = 5432\n",
    "    conn_info[\"dbname\"] = 'eicu'\n",
    "    conn_info[\"schema_name\"] = 'public,eicu_crd'\n",
    "    \n",
    "# Connect to the eICU database\n",
    "print('Database: {}'.format(conn_info['dbname']))\n",
    "print('Username: {}'.format(conn_info[\"sqluser\"]))\n",
    "if conn_info[\"sqlpass\"] == '':\n",
    "    # try connecting without password, i.e. peer or OS authentication\n",
    "    try:\n",
    "        if (conn_info[\"sqlhost\"] == 'localhost') & (conn_info[\"sqlport\"]=='5432'):\n",
    "            con = psycopg2.connect(dbname=conn_info[\"dbname\"],\n",
    "                                   user=conn_info[\"sqluser\"])            \n",
    "        else:\n",
    "            con = psycopg2.connect(dbname=conn_info[\"dbname\"],\n",
    "                                   host=conn_info[\"sqlhost\"],\n",
    "                                   port=conn_info[\"sqlport\"],\n",
    "                                   user=conn_info[\"sqluser\"])\n",
    "    except:\n",
    "        conn_info[\"sqlpass\"] = getpass.getpass('Password: ')\n",
    "\n",
    "        con = psycopg2.connect(dbname=conn_info[\"dbname\"],\n",
    "                               host=conn_info[\"sqlhost\"],\n",
    "                               port=conn_info[\"sqlport\"],\n",
    "                               user=conn_info[\"sqluser\"],\n",
    "                               password=conn_info[\"sqlpass\"])\n",
    "query_schema = 'set search_path to ' + conn_info['schema_name'] + ';'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Examine a single patient"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "patientunitstayid = 2704494"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>patientunitstayid</th>\n",
       "      <th>admissiondxid</th>\n",
       "      <th>admitdxenteredyear</th>\n",
       "      <th>admitdxenteredtime24</th>\n",
       "      <th>admitdxenteredtime</th>\n",
       "      <th>admitdxenteredoffset</th>\n",
       "      <th>admitdxpath</th>\n",
       "      <th>admitdxname</th>\n",
       "      <th>admitdxtext</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2704494</td>\n",
       "      <td>7981672</td>\n",
       "      <td>2014</td>\n",
       "      <td>04:45:42</td>\n",
       "      <td>morning</td>\n",
       "      <td>20</td>\n",
       "      <td>admission diagnosis|Non-operative Organ System...</td>\n",
       "      <td>Hematology</td>\n",
       "      <td>Hematology</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2704494</td>\n",
       "      <td>7981671</td>\n",
       "      <td>2014</td>\n",
       "      <td>04:45:42</td>\n",
       "      <td>morning</td>\n",
       "      <td>20</td>\n",
       "      <td>admission diagnosis|Was the patient admitted f...</td>\n",
       "      <td>No</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2704494</td>\n",
       "      <td>7981673</td>\n",
       "      <td>2014</td>\n",
       "      <td>04:45:42</td>\n",
       "      <td>morning</td>\n",
       "      <td>20</td>\n",
       "      <td>admission diagnosis|All Diagnosis|Non-operativ...</td>\n",
       "      <td>Thrombocytopenia</td>\n",
       "      <td>Thrombocytopenia</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   patientunitstayid  admissiondxid  admitdxenteredyear admitdxenteredtime24  \\\n",
       "0            2704494        7981672                2014             04:45:42   \n",
       "1            2704494        7981671                2014             04:45:42   \n",
       "2            2704494        7981673                2014             04:45:42   \n",
       "\n",
       "  admitdxenteredtime  admitdxenteredoffset  \\\n",
       "0            morning                    20   \n",
       "1            morning                    20   \n",
       "2            morning                    20   \n",
       "\n",
       "                                         admitdxpath       admitdxname  \\\n",
       "0  admission diagnosis|Non-operative Organ System...        Hematology   \n",
       "1  admission diagnosis|Was the patient admitted f...                No   \n",
       "2  admission diagnosis|All Diagnosis|Non-operativ...  Thrombocytopenia   \n",
       "\n",
       "        admitdxtext  \n",
       "0        Hematology  \n",
       "1                No  \n",
       "2  Thrombocytopenia  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "query = query_schema + \"\"\"\n",
    "select *\n",
    "from admissiondx\n",
    "where patientunitstayid = {}\n",
    "\"\"\".format(patientunitstayid)\n",
    "\n",
    "df = pd.read_sql_query(query, con)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We can see there are only 3 records for this patient. Let's examine the `admitdxpath`, `admitdxname`, and `admitdxtext` values in detail."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Row 1\n",
      "  admitdxpath: admission diagnosis|Non-operative Organ Systems|Organ System|Hematology\n",
      "  admitdxname: Hematology\n",
      "  admitdxtext: Hematology\n",
      "Row 2\n",
      "  admitdxpath: admission diagnosis|Was the patient admitted from the O.R. or went to the O.R. within 4 hours of admission?|No\n",
      "  admitdxname: No\n",
      "  admitdxtext: No\n",
      "Row 3\n",
      "  admitdxpath: admission diagnosis|All Diagnosis|Non-operative|Diagnosis|Hematology|Thrombocytopenia\n",
      "  admitdxname: Thrombocytopenia\n",
      "  admitdxtext: Thrombocytopenia\n"
     ]
    }
   ],
   "source": [
    "for i, row in df.iterrows():\n",
    "    print('Row {}'.format(i+1))\n",
    "    for c in ['admitdxpath', 'admitdxname', 'admitdxtext']:\n",
    "        print('  {}: {}'.format(c, row[c]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We can see that these rows are providing us: (1) the APACHE diagnosis, (2) auxiliary admission diagnosis information. Row 1 informs us of the organ system (Hematology), row 2 informs us that the patient was not admitted to/from the operating room (O.R.), and row 3 gives us the full APACHE IV admission diagnosis (thrombocytopenia)."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Summarize diagnoses"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "143620   admission diagnosis|Was the patient admitted from the O.R. or went to the O.R. within 4 hours of admission?|No\n",
      " 63930   admission diagnosis|Non-operative Organ Systems|Organ System|Cardiovascular\n",
      " 34243   admission diagnosis|Was the patient admitted from the O.R. or went to the O.R. within 4 hours of admission?|Yes\n",
      " 30577   admission diagnosis|Elective|Yes\n",
      " 25750   admission diagnosis|Non-operative Organ Systems|Organ System|Neurologic\n",
      " 23230   admission diagnosis|Non-operative Organ Systems|Organ System|Respiratory\n",
      " 15630   admission diagnosis|Operative Organ Systems|Organ System|Cardiovascular\n",
      " 12758   admission diagnosis|Additional APACHE  Information|Thrombolytic Therapy received within 24 hours|No\n",
      " 11830   admission diagnosis|Non-operative Organ Systems|Organ System|Gastrointestinal\n",
      "  8862   admission diagnosis|All Diagnosis|Non-operative|Diagnosis|Cardiovascular|Sepsis, pulmonary\n"
     ]
    }
   ],
   "source": [
    "query = query_schema + \"\"\"\n",
    "select admitdxpath, count(*) as numobs\n",
    "from admissiondx\n",
    "group by admitdxpath\n",
    "order by numobs desc\n",
    "\"\"\"\n",
    "\n",
    "df_grp = pd.read_sql_query(query, con)\n",
    "for i, row in df_grp.head(n=10).iterrows():\n",
    "    print('{:6d}   {}'.format(row['numobs'], row['admitdxpath']))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Here we can see the most popular information present in the table: admission from OR, admission for elective surgery, popular body systems, and popular diagnoses."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Patients missing data\n",
    "\n",
    "Patients with APACHE scores almost always have a diagnosis in the admissiondx table."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>n</th>\n",
       "      <th>n_with_dx</th>\n",
       "      <th>n_with_apachepred</th>\n",
       "      <th>n_pred_with_no_dx</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>200859</td>\n",
       "      <td>177863</td>\n",
       "      <td>146696</td>\n",
       "      <td>389</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        n  n_with_dx  n_with_apachepred  n_pred_with_no_dx\n",
       "0  200859     177863             146696                389"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "query = query_schema + \"\"\"\n",
    "with dx as\n",
    "(\n",
    "select distinct patientunitstayid from admissiondx\n",
    "where admitdxpath like '%|All Diagnosis|%'\n",
    ")\n",
    ", apv as\n",
    "(\n",
    "select patientunitstayid\n",
    "from apachepatientresult\n",
    "where apacheversion = 'IVa'\n",
    "and apachescore <> '-1'\n",
    ")\n",
    "select count(pt.patientunitstayid) as n\n",
    ", count(dx.patientunitstayid) as n_with_dx\n",
    ", count(apv.patientunitstayid) as n_with_apachepred\n",
    ", sum(\n",
    "case when apv.patientunitstayid is not null\n",
    "and dx.patientunitstayid is null\n",
    "then 1 else 0 end) as n_pred_with_no_dx\n",
    "from patient pt\n",
    "left join dx\n",
    "on pt.patientunitstayid = dx.patientunitstayid\n",
    "left join apv\n",
    "on pt.patientunitstayid = apv.patientunitstayid\n",
    "\"\"\"\n",
    "\n",
    "df_ct = pd.read_sql_query(query, con)\n",
    "df_ct"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Identifying patients with a specific admission diagnosis"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's look for patients who were admitted with sepsis. First, we search for these diagnoses to manually curate them."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "import re\n",
    "# remove prefixes from admission dx strings for simplicity\n",
    "def remove_prefix(text, prefix):\n",
    "    return re.sub(r'^{0}'.format(re.escape(prefix)), '', text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "426 diagnoses in total.\n",
      "7 contain the dx string sepsis.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>dx</th>\n",
       "      <th>numobs</th>\n",
       "      <th>valid_dx</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Cardiovascular|Sepsis, pulmonary</td>\n",
       "      <td>8862</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Cardiovascular|Infarction, acute myocardial (MI)</td>\n",
       "      <td>7228</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Neurology|CVA, cerebrovascular accident/stroke</td>\n",
       "      <td>6647</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Cardiovascular|CHF, congestive heart failure</td>\n",
       "      <td>6617</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Cardiovascular|Sepsis, renal/UTI (including bl...</td>\n",
       "      <td>5273</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                  dx  numobs  valid_dx\n",
       "0                   Cardiovascular|Sepsis, pulmonary    8862         1\n",
       "1   Cardiovascular|Infarction, acute myocardial (MI)    7228         0\n",
       "2     Neurology|CVA, cerebrovascular accident/stroke    6647         0\n",
       "3       Cardiovascular|CHF, congestive heart failure    6617         0\n",
       "4  Cardiovascular|Sepsis, renal/UTI (including bl...    5273         1"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dx = 'sepsis'\n",
    "query = query_schema + \"\"\"\n",
    "select \n",
    "  admitdxpath\n",
    "  , count(*) as numobs\n",
    "  , max(case when lower(admitdxpath) like '%{}%' then 1 else 0 end) as valid_dx\n",
    "from admissiondx\n",
    "-- isolate to just diagnoses (not auxiliary info like elective admission)\n",
    "where admitdxpath like '%|All Diagnosis|%'\n",
    "group by admitdxpath\n",
    "order by numobs desc\n",
    "\"\"\".format(dx)\n",
    "\n",
    "df_dx = pd.read_sql_query(query, con)\n",
    "print('{} diagnoses in total.'.format(df_dx.shape[0]))\n",
    "print('{} contain the dx string {}.'.format(df_dx['valid_dx'].sum(), dx))\n",
    "\n",
    "# trim some of the text from the admitdxpath\n",
    "df_dx['dx'] = df_dx['admitdxpath'].apply(lambda x: remove_prefix(x, 'admission diagnosis|All Diagnosis|'))\n",
    "\n",
    "# add operative/non-operative\n",
    "df_dx['op'] = df_dx['dx'].apply(lambda x: x[0:13] == 'Non-operative')\n",
    "\n",
    "# strip operative/non-operative text\n",
    "df_dx['dx'] = df_dx['dx'].apply(lambda x: remove_prefix(x, 'Non-operative|Diagnosis|'))\n",
    "df_dx['dx'] = df_dx['dx'].apply(lambda x: remove_prefix(x, 'Operative|Diagnosis|'))\n",
    "\n",
    "df_dx[['dx','numobs','valid_dx']].head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Since there are only 426 diagnoses, we can manually review and confirm that we cover all possible sepsis diagnoses. However, even still, keep in mind that these diagnoses are chosen according to the APACHE scoring criteria. For example, if a patient was admitted with sepsis secondary to a surgery, then the admission diagnosis would need to be the surgery they were originally hospitalized for. Thus we can consider these diagnoses specific, but not necessarily sensitive."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Using the following sepsis diagnoses:\n",
      "'admission diagnosis|All Diagnosis|Non-operative|Diagnosis|Cardiovascular|Sepsis, pulmonary'\n",
      "'admission diagnosis|All Diagnosis|Non-operative|Diagnosis|Cardiovascular|Sepsis, renal/UTI (including bladder)'\n",
      "'admission diagnosis|All Diagnosis|Non-operative|Diagnosis|Cardiovascular|Sepsis, GI'\n",
      "'admission diagnosis|All Diagnosis|Non-operative|Diagnosis|Cardiovascular|Sepsis, unknown'\n",
      "'admission diagnosis|All Diagnosis|Non-operative|Diagnosis|Cardiovascular|Sepsis, cutaneous/soft tissue'\n",
      "'admission diagnosis|All Diagnosis|Non-operative|Diagnosis|Cardiovascular|Sepsis, other'\n",
      "'admission diagnosis|All Diagnosis|Non-operative|Diagnosis|Cardiovascular|Sepsis, gynecologic'\n",
      "\n",
      "23136 admissions found with an admission diagnosis of sepsis.\n"
     ]
    }
   ],
   "source": [
    "sepsis_dx = df_dx.loc[df_dx['valid_dx']==1, 'admitdxpath'].values\n",
    "sepsis_dx = [\"'\" + x + \"'\" for x in sepsis_dx]\n",
    "\n",
    "print('Using the following sepsis diagnoses:')\n",
    "print('{}'.format('\\n'.join(sepsis_dx)))\n",
    "\n",
    "sepsis_dx = ','.join(sepsis_dx)\n",
    "\n",
    "print()\n",
    "query = query_schema + \"\"\"\n",
    "select \n",
    "  patientunitstayid, admitdxpath\n",
    "from admissiondx\n",
    "where admitdxpath in\n",
    "(\n",
    "{}\n",
    ")\n",
    "\"\"\".format(sepsis_dx)\n",
    "\n",
    "df = pd.read_sql_query(query, con)\n",
    "print('{} admissions found with an admission diagnosis of {}.'.format(df.shape[0], dx))"
   ]
  }
 ],
 "metadata": {
  "front-matter": {
   "date": "2015-09-01",
   "linktitle": "admissiondrug",
   "title": "admissiondrug"
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
